# -*- coding:utf-8 -*-
# @Time : 2021/2/23 14:29
# @Author : Administrator
# @File : ershoufang.py
# @Software: PyCharm
# @Motto: good good study,day day up

import requests
from parsel import Selector
import pymongo
from loguru import logger
from pymongo import UpdateOne

myclient = pymongo.MongoClient("mongodb://localhost:27017/")
mydb = myclient["spiders"]
mycol = mydb["ershoufang"]
items_list = []

for page in range(1, 101):
    url = f'https://cs.lianjia.com/ershoufang/pg{page}/'
    logger.info(f'正在下载{url}网页中的数据.....')
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
    }
    response = requests.get(url=url, headers=headers)
    selector = Selector(response.text)
    items = selector.xpath('//ul[@class="sellListContent"]//li')

    for item in items:
        title = item.xpath('.//div[@class="title"]//text()').extract_first()
        if title:
            positionInfo = item.xpath('.//div[@class="positionInfo"]//text()').extract()
            community = positionInfo[0]
            address = positionInfo[2]
            houseInfo = item.xpath('.//div[@class="houseInfo"]//text()').extract_first()
            followInfo = item.xpath('.//div[@class="followInfo"]//text()').extract_first()
            totalPrice = item.xpath('.//div[@class="totalPrice"]//text()').extract_first() + '万'
            unitPrice = item.xpath('.//div[@class="unitPrice"]//text()').extract_first().replace('单价', '')
            item = {
                'title': title,
                'community': community,
                'address': address,
                'houseInfo': houseInfo,
                'followInfo': followInfo,
                'totalPrice': totalPrice,
                'unitPrice': unitPrice
            }
            items_list.append(item)
            mycol.insert_many(items_list)
            items_list.clear()
    logger.info('写入完成......')